{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "PCA computation via SVD.\n",
    "Compares the PCA computed via SVD with\n",
    "direct computation using eigens of covariance\n",
    "matrix. Demonstrates that they are equal.\n",
    "\"\"\"\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run 4.3.2-common.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Principal components obtained via PCA:\n",
      "tensor([[-0.4445, -0.8958],\n",
      "        [-0.8958,  0.4445]])\n",
      "Principal components obtained via SVD:\n",
      "tensor([[-0.4445, -0.8958],\n",
      "        [-0.8958,  0.4445]])\n"
     ]
    }
   ],
   "source": [
    "N = 100\n",
    "\n",
    "torch.manual_seed(42)\n",
    "\n",
    "# We create a random feature vector\n",
    "x_0 = torch.normal(0, 100, (N,))\n",
    "\n",
    "# Then we create another feature vector which is\n",
    "# highly correlated with the previous feature:\n",
    "# x1 = 2 * x0.\n",
    "# We add random noise to this second feature.\n",
    "x_1 = 2 * x_0 + torch.normal(0, 20, (N,))\n",
    "\n",
    "# Create the data matrix with x0, x1 as columns\n",
    "X = torch.column_stack((x_0, x_1))\n",
    "\n",
    "# Perform PCA (directly as eigen of covariance\n",
    "# matrix)\n",
    "principal_values, principal_components = pca(X)\n",
    "principal_values = principal_values.real\n",
    "principal_components = principal_components.real\n",
    "\n",
    "sorted_indices = torch.argsort(-principal_values)\n",
    "print(\"Principal components obtained via PCA:\\n{}\"\\\n",
    "      .format(principal_components[:,\n",
    "                                   sorted_indices]))\n",
    "\n",
    "# Perform PCA via SVD following Mean subtraction\n",
    "X_mean = X - torch.mean(X, axis=0)\n",
    "U, S, V_t = torch.linalg.svd(X_mean)\n",
    "V = V_t.T\n",
    "\n",
    "# Columns of V are the principal components\n",
    "print(\"Principal components obtained via SVD:\\n{}\".\\\n",
    "      format(V))\n",
    "\n",
    "\n",
    "# Assert that principal components are the same\n",
    "for i in range(V.shape[1]):\n",
    "    # -V[:, i] is used as they can be in opposite\n",
    "    # direction\n",
    "    assert torch.allclose(\n",
    "               V[:, i],\n",
    "               principal_components[:, sorted_indices[i]])\\\n",
    "           or torch.allclose(\n",
    "               -V[:, i],\n",
    "               principal_components[:, sorted_indices[i]]) "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
